package org.tzeng.glwj.datam;

import org.tzeng.glwj.datag.DataSet;
import org.tzeng.glwj.datag.GenerateDataSet;
import org.tzeng.glwj.datag.Item;

import java.util.ArrayList;

public class PreDeal {
	public static void main(String[] args) {
		GenerateDataSet generate = new GenerateDataSet();
		//实验过程中需要的参数

		int contentLength = 4;//每个项目内容长度限制
		int charNum = 6;//每个项目基本构成元素（a-z）最大26个

		/*区间下限值、区间上限值*/
		int lowLimit = 20, highLimit = 500;

		int itemNum = 10;//条目数量限制

		double ratio = 0.1;//区间扩展因子，跨度的
		//生成数据集
		DataSet dataSet  = generate.GetDataSet(contentLength, charNum, lowLimit, highLimit, itemNum);
		System.out.println("--------------------范围扩展前-------------------");
		ArrayList<Item> items = dataSet.getDatas();
		for(int i = 0,size = items.size();i<size;i++){
			System.out.println(items.get(i).getContent() + "\t: \t["+ items.get(i).getStart()+", "+items.get(i).getEnd() + "]");
		}
		//对区间进行扩展
		extendArea(dataSet, ratio);

		System.out.println("--------------------范围扩展后-------------------");
//		ArrayList<Item> items = dataSet.getDatas();
		for(int i = 0,size = items.size();i<size;i++){
			System.out.println(items.get(i).getContent() + "\t: \t["+ items.get(i).getStart()+", "+items.get(i).getEnd() + "]");
		}

		//DataSet afterFilter = filterOperation(dataSet);
		getFrequent(dataSet, charNum, lowLimit, highLimit);
		/*ArrayList<Item> newitems = afterFilter.getDatas();
		for(int i = 0,size = newitems.size();i<size;i++){
		System.out.println("start: "+ newitems.get(i).getStart()+"---"+"end: "+newitems.get(i).getEnd());
		System.out.println("content: "+newitems.get(i).getContent());
		}*/
	}

	//对扩展后的数据集条目进行扩展，与其它项目没有任何交集的项目删掉
	public static DataSet filterOperation(DataSet initDataSet){
		DataSet dataSet = new DataSet();
		ArrayList<Item> newItems = dataSet.getDatas();
		ArrayList<Item> items = initDataSet.getDatas();
		newItems.add(new Item(items.get(0).getStart(),items.get(0).getEnd(),items.get(0).getContent()));
		for (int i = 0; i < items.size(); i++) {
			Item curItem = items.get(i);//获取当前item
			int curStart = items.get(i).getStart();
			int curEnd = items.get(i).getEnd();
			for (int j = 0; j < items.size(); j++) {//看与其它项目是否有交集
				int start = items.get(j).getStart();
				int end = items.get(j).getEnd();
				if(curEnd<=start||curStart>=end){//没有继续
					continue;
				}else{//有加入过滤后的集合继续
					newItems.add(new Item(curItem.getStart(),curItem.getEnd(),curItem.getContent()));
					break;
				}
			}
		}
		//dataSet.setDatas(newItems);
		return dataSet;
	}

	//对原始数据集项目的区间，根据指定比例扩展
	public static void extendArea(DataSet initDataSet,double ratio){
		ArrayList<Item> items = initDataSet.getDatas();
		for (int i = 0,size=items.size(); i < size; i++) {
			Item curItem = items.get(i);
			double curStart = items.get(i).getStart();
			double curEnd = items.get(i).getEnd();
			//double span = curEnd-curStart;
			curStart = curStart*(1-ratio);
			curEnd = curEnd*(1+ratio);
			if(curStart<0)
				curItem.setStart((int)curStart);
			else
				curItem.setStart((int)curStart);
			curItem.setEnd((int)curEnd);
		}
	}

	//寻找频繁项集，及相应支持率
	public static void getFrequent(DataSet initDataSet,int charNum,int lowLimit,int highLimit){
		ArrayList<Item> items = initDataSet.getDatas();
		/*for(int k = 0,size = items.size();k<size;k++){
			System.out.println(items.get(k).getContent() +" : ("+ items.get(k).getStart()+", "+items.get(k).getEnd()+")");
		}*/
		for (int i = 0; i < charNum; i++) {//根据单个字符数
			ArrayList<Item> newItems = new ArrayList<Item>();

			for (int j = 0,size=items.size(); j < size; j++) {
				Item curItem = items.get(j);
				String curContent = curItem.getContent();
				if(-1!=(curContent.indexOf((char)('a'+i)))){
					Item t = new Item();
					t.setStart(curItem.getStart());
					t.setEnd(curItem.getEnd());
					t.setContent(" "+ (char)('a'+i));
					newItems.add(t);
				}
			}
			/*for(int k = 0,size = newItems.size();k<size;k++){
				System.out.println(newItems.get(k).getContent() +" : ["+ newItems.get(k).getStart()+", "+newItems.get(k).getEnd()+"]");
			}*/

			mergePrint(newItems, lowLimit, highLimit);
		}
	}

	//归并项目集，输出频繁项目模式及支持数
	public static void mergePrint(ArrayList<Item> items,int lowLimit,int highLimit){
		//区间发生变化了
		lowLimit = (int)(lowLimit*0.5);
		highLimit = (int)(highLimit*1.5);
		boolean[] flags = new boolean[highLimit-lowLimit+2];
		boolean[] ref = new boolean[highLimit-lowLimit+2];
		for (int i = 0; i < ref.length; i++) {
			ref[i] = true;
		}
		int size = items.size();
		if(items.size()==0){
			System.out.println("  --不存在！--");
		}
		String content = items.get(0).getContent();
		for (int i = 0; i < size; i++) {
			Item curItem = items.get(i);
			int curStart = curItem.getStart();
			int curEnd = curItem.getEnd();
			//System.out.println("cur:"+curStart+" end:"+curEnd);
			System.arraycopy(ref, curStart-lowLimit, flags, curStart-lowLimit, curEnd-curStart+1);
		}
		System.out.print(content+": ");
		boolean tag = false;
		int start = 0,end = 0;
		for (int i = 0,len = flags.length; i < len; i++) {
			if(flags[i]==false){
				if(tag==false)//一个片段没开始
					continue;
				else{
					tag = false;
					end = i-1;//片段开始但结束了
					System.out.println("start: "+(start+lowLimit) + " end: "+(end+lowLimit));
				}
			}else{
				if(tag==false){
					tag = true;
					start = i;
				}
				else//片段正在继续
					continue;
			}
		}
	}

}
